查看原文
其他

libsgmain反混淆及VM还原

krash 看雪学苑 2022-07-01

本文为看雪论坛精华文章

看雪论坛作者ID:krash



我会先是介绍反混淆和还原VM所使用的工具,然后使用他们对Native代码反混淆,接着进行VM的还原及展示还原后的伪码。


一  使用工具介绍


Trace工具

  • 支持不同追踪粒度:

    • 代码覆盖

    • 指令块

    • 指令

  • 支持记录内存访问内容

  • 支持保存CPU上下文

  • 支持追踪系统调用

000131ac 7b39a691ac cmp w8, #0xd w: [NG: 0, ZR: 0, CY: 1, OV: 0], r: [x8: 0000000000000016] libsgmainso-6.5.22.so 000131b0 7b39a691b0 str x0, [sp, #0x18] w: [], r: [x0: 0000007b65ce6820, sp: 0000007b3bfefc20] libsgmainso-6.5.22.so w 0x0000007b3bfefc38 8 .. e8 fe fe 3b 7b 00 00 00 20 68 ce 65 7b 00 00 00 .........h.e.... libsgmainso-6.5.22.so 000131b4 7b39a691b4 b.cs 0x7b39a691bc w: [], r: [CY: 1] libsgmainso-6.5.22.so 000131bc 7b39a691bc mov w0, #0x20 w: [x0: 0000000000000020], r: [] libsgmainso-6.5.22.so 000131c0 7b39a691c0 str wzr, [sp, #0x24] w: [], r: [sp: 0000007b3bfefc20] libsgmainso-6.5.22.so w 0x0000007b3bfefc44 4 .. df 00 00 00 00 00 00 00 1b 0b 19 3c 6b db 19 e9 ............k... libsgmainso-6.5.22.so 000131c4 7b39a691c4 bl 0x7b39a611a0 w: [x30: 0000007b39a691c8], r: [] libsgmainso-6.5.22.so 0000b1a0 7b39a611a0 adrp x16, 0x7b39b7a000 w: [x16: 0000007b39b7a000], r: [] libsgmainso-6.5.22.so 0000b1a4 7b39a611a4 ldr x17, [x16, #0x8a0] w: [x17: 0000007be97e0934], r: [x16: 0000007b39b7a000] libsgmainso-6.5.22.so r 0x0000007b39b7a8a0 8 .. 34 09 7e e9 7b 00 00 00 c4 5b 83 e9 7b 00 00 00 4............... libsgmainso-6.5.22.so 0000b1a8 7b39a611a8 add x16, x16, #0x8a0 w: [x16: 0000007b39b7a8a0], r: [x16: 0000007b39b7a000] libsgmainso-6.5.22.so 0000b1ac 7b39a611ac br x17 w: [], r: [x17: 0000007be97e0934] libsgmainso-6.5.22.so 0001c934 7be97e0934 adrp x8, 0x7be98b7000 w: [x8: 0000007be98b7000], r: [] libc.so malloc + 0000 0001c938 7be97e0938 add x8, x8, #0x0 w: [x8: 0000007be98b7000], r: [x8: 0000007be98b7000] libc.so malloc + 0004 0001c93c 7be97e093c ldr x1, [x8, #0x78] w: [x1: 0000000000000000], r: [x8: 0000007be98b7000] libc.so malloc + 0008 r 0x0000007be98b7078 8 .. xx xx xx xx xx xx xx xx 00 00 00 00 00 00 00 00 --------........ libc.so malloc + 0008 0001c940 7be97e0940 cbnz x1, 0x7be97e0948 w: [], r: [x1: 0000000000000000] libc.so malloc + 000c 0001c944 7be97e0944 b 0x7be986277c w: [], r: [] libc.so malloc + 0010 0009e77c 7be986277c sub sp, sp, #0x90 w: [sp: 0000007b3bfefb90], r: [sp: 0000007b3bfefc20] libc.so 0009e780 7be9862780 str x27, [sp, #0x30] w: [], r: [x27: 0000007b3bff3588, sp: 0000007b3bfefb90] libc.so w 0x0000007b3bfefbc0 8 .. 88 35 ff 3b 7b 00 00 00 xx xx xx xx xx xx xx xx .5......-------- libc.so

以上是一个粒度最小的trace所展现的信息,它包含了:
  • 相对地址

  • 绝对地址

  • 汇编指令

  • 读写的寄存器及内容

  • 访问内存地址及内容

  • 地址所在模块

  • 地址附近符号


二进制分析框架

* 完全使用Python实现,方便配合现有二进制分析平台进行分析。
* 内部使用SSA形式, 树型结构的IR。寄存器,栈变量均为SSA形式;内存也是SSA,即MemorySSA。
* 支持将Ghidra p-code转换成我们自己的IR。这个特性可以让分析框架支持大量指令集和指令。
* 内部有维护NON-SSA和SSA IR的对应关系。实现这个特性是为了方便反混淆,因为在修改CFG的时候维护IR的SSA形式非常困难,不如直接修改原始的NON-SSA的CFG,反混淆后再重建SSA IR简单。
* 常见的编译器分析:支配关系,控制依赖,活跃分析,别名分析,区域分析,循环分析,值集分析(VSA)。
* 常见的编译器优化:死代码消除,常量传播,常量折叠,稀疏常数条件传播(SCCP)。
反编译器(结构化分析)。



二  反混淆


这里选择去年某个版本的手淘,还原10401命令算法过程遇到的混淆,libsgmain版本为: 6.5.22,md5: 875d423e9df06024bfddeed4322a4de5,模块基址:0x7b3bd56000。


二进制代码反混淆


该版本的doCommandNative位于00013124,代码如下:
0000000000013124 SUB SP, SP, #0x90...000000000001317C LDRSW X3, =0xFFFFFF510000000000013180 LDRSW X25, [X9]0000000000013184 ADD X3, X3, X250000000000013188 ADD X11, X11, X3000000000001318C MOV X8, #0x160000000000013190 BR X11

可以看到,X11是动态计算出来的,当进一步分析之后发现,这个地址还依赖一个静态变量,只使用静态分析难以分析出这个跳转目标。

这类间接跳转混淆可以很好阻止逆向工具的静态分析。有时静态分析难以解决的问题,使用动态分析则不是个问题。
0001317c 7b3bd6917c ldrsw x3, 0x7b3bd6919400013180 7b3bd69180 ldrsw x25, [x9] 00013184 7b3bd69184 add x3, x3, x25 00013188 7b3bd69188 add x11, x11, x3 0001318c 7b3bd6918c mov x8, #0x16 00013190 7b3bd69190 br x11 000131ac 7b3bd691ac cmp w8, #0xd

通过trace我们可以清楚看到,br的跳转目标是0x000131ac。对于这类混淆,我们只需要从trace中直接重建CFG即可完成反混淆。从trace中重建CFG算法跟直接从普通的二进制构建CFG差别不大。

我的实现算法主要参考了Binary Ninja的这篇博客Architecture Agnostic Function Detection In Binaries

简单来说,先从Trace中创建ICFG,删除其中的Call,Ret边,图中的每一个弱连通分量就对应一个函数CFG。

有了函数CFG之后,就可以进行反编译了,下面我反混淆后的代码:
var_60:8 = x28;var_58:8 = x27;var_50:8 = x26;var_48:8 = x25;var_40:8 = x24;var_38:8 = x23;var_30:8 = x22;var_28:8 = x21;var_20:8 = x20;var_18:8 = x19;var_10:8 = x29;var_8:8 = x30;x27_1 = tpidr_el0;var_68:8 = [x27_1 + 0x28]:8;w20_1 = w2;x25_1 = sx.64([0x7b3be97450]:1 + 0xb6);switch ( 0x7b3bd6917c + sx.64([0x7b3bd69194]:4) + x25_1 ) { case 0x7b3bd691bc: var_6c:4 = 0x0; x22_1 = sub_7b3bd611a0 ( 0x20 ); w13_2 = x22_1 == 0x0; w23_1 = (sx.64(w20_1) * 0x68db8bad >>s 0x2c).w + (sx.64(w20_1) * 0x68db8bad >> 0x3f).w; x9_9 = sx.64(w20_1 - w23_1 * 0x2710) * 0x51eb851f; w15_2 = 0xea; w19_1 = 0x72; x14_2 = sx.64(w20_1) * 0x51eb851f; w24_1 = (x9_9 >>s 0x25).w + (x9_9 >> 0x3f).w; while ( 0x1 ) { x26_1 = x25_1; if (w19_1 == 0x92) { if ([x27_1 + 0x28]:8 != var_68:8) { } return x25_1; } while ( w15_2 != 0x0 ) { if ((w13_2 & 0x1) == 0x0) { x2_2 = zx.64(w20_1 - ((x14_2 >>s 0x25).w + (x14_2 >> 0x3f).w) * 0x64); t27_1:8 = x22_1; [t27_1:8]:8 = 0x0; [t27_1:8 + 0x8]:8 = 0x0; t27_2:8 = x22_1; [t27_2:8]:4 = w23_1; [t27_2:8 + 0x4]:4 = w24_1; [x22_1 + 0x8]:4 = x2_2.w2; [x22_1 + 0x10]:8 = x0; [x22_1 + 0x18]:8 = x3; x14_2 = x14_2; w13_2 = w13_2; x26_1 = sub_7b3bd65c18 ( zx.64(w23_1), zx.64(w24_1), x2_2, 0x1, x22_1, &var_6c:4 ); w15_2 = 0x0; continue; } } sub_7b3bd61290 ( x22_1 ); x14_2 = x14_2; w24_1 = w24_1; x27_1 = x27_1; w23_1 = w23_1; w15_2 = w15_2; w20_1 = w20_1; x22_1 = x22_1; w13_2 = w13_2; if (x26_1 != 0x0) { w19_1 = 0x92; x25_1 = x26_1; continue; } } break;}

我在这里使用的是switch pc的方法,将"br x11"目标作为switch的一个case来表示这个间接跳转。
switch(pc) { case target: break;}

从上面伪码我们很容易看出阿里间接跳转的混淆模式:
x25_1 = sx.64([0x7b3be97450]:1 + 0xb6);switch ( 0x7b3bd6917c + sx.64([0x7b3bd69194]:4) + x25_1 ) // 动态计算跳转目标,跳转目标依赖从bbs段0x141450中读取一个字节数据

识别并删除这类模式的跳转可以获得更干净的伪码:
var_60:8 = x28;...var_8:8 = x30;x27_1 = tpidr_el0;var_68:8 = [x27_1 + 0x28]:8;w20_1 = w2;x25_1 = sx.64([0x7b3be97450]:1 + 0xb6);pc_1 = 0x7b3bd6917c + sx.64([0x7b3bd69194]:4) + x25_1;var_6c:4 = 0x0;x22_1 = sub_7b3bd611a0 ( 0x20 ); // mallocw13_2 = x22_1 == 0x0;w23_1 = (sx.64(w20_1) * 0x68db8bad >>s 0x2c).w + (sx.64(w20_1) * 0x68db8bad >> 0x3f).w;x9_9 = sx.64(w20_1 - w23_1 * 0x2710) * 0x51eb851f;w15_2 = 0xea;w19_1 = 0x72;x14_2 = sx.64(w20_1) * 0x51eb851f;w24_1 = (x9_9 >>s 0x25).w + (x9_9 >> 0x3f).w;while ( 0x1 ) { x26_1 = x25_1; if (w19_1 == 0x92) { if ([x27_1 + 0x28]:8 != var_68:8) { } return x25_1; } while ( w15_2 != 0x0 ) { if ((w13_2 & 0x1) == 0x0) { x2_2 = zx.64(w20_1 - ((x14_2 >>s 0x25).w + (x14_2 >> 0x3f).w) * 0x64); t27_1:8 = x22_1; [t27_1:8]:8 = 0x0; [t27_1:8 + 0x8]:8 = 0x0; t27_2:8 = x22_1; [t27_2:8]:4 = w23_1; [t27_2:8 + 0x4]:4 = w24_1; [x22_1 + 0x8]:4 = x2_2.w2; [x22_1 + 0x10]:8 = x0; [x22_1 + 0x18]:8 = x3; x14_2 = x14_2; w13_2 = w13_2; x26_1 = sub_7b3bd65c18 ( zx.64(w23_1), zx.64(w24_1), x2_2, 0x1, x22_1, &var_6c:4 ); w15_2 = 0x0; continue; } } sub_7b3bd61290 ( x22_1 ); // free x14_2 = x14_2; w24_1 = w24_1; x27_1 = x27_1; w23_1 = w23_1; w15_2 = w15_2; w20_1 = w20_1; x22_1 = x22_1; w13_2 = w13_2; if (x26_1 != 0x0) { w19_1 = 0x92; x25_1 = x26_1; continue; }}

我们也可以使用这个分析结果,对这个间接跳转进行patch来修复CFG,这样就可以使用熟悉静态工具进行下一步分析了。

这个函数其实是个非常好的反编译器测试用例,可以测试反编译器的:
  • 除法,模运算的化简能力

  • 函数调用参数,返回值识别准确性

  • 反编译器优化能力

  • 循环结构能力


不对反编译器进行干预的情况下,我测试了5个反编译器,他们分别是:IDA,Ghidra, Binary Ninja,Jeb,RetDec,只有Ghidra能正确反编译这个函数。对反编译有兴趣的朋友可以深入研究这个函数。
 
到这里,我们已经完成间接跳转的反混淆。他之后还有流程平坦混淆等着我们。

流程平坦反混淆


传统的流程平坦反混淆可以参考我之前的帖子。我这次使用一种仅依赖编译器优化,不必区分混淆框架代码,真实代码块,真实代码块间的关系的反混淆方案:直接反编译trace。

其本质是对特定的输入展开混淆函数中的所有循环,对展开后的代码应用编译器优化去除混淆。考察下面trace代码片段:
00013208 7b3bd69208 mov w15, #0xea0001320c 7b3bd6920c mov w8, #0xb5 ; 对状态变量w8赋值00013210 7b3bd69210 mov w28, #0x1b ; 对状态变量w28赋值00013214 7b3bd69214 mov w19, #0x72 ; 对状态变量w19赋值,所以这个函数包含至少三个CFF?00013218 7b3bd69218 smull x14, w20, w110001321c 7b3bd6921c add w24, w9, w1000013220 7b3bd69220 mov w10, #0x6400013224 7b3bd69224 b 0x7b3bd6923800013238 7b3bd69238 cmp w8, #0xda ; w8 = 0xb50001323c 7b3bd6923c b.eq 0x7b3bd6935c ; 在trace中,这里会是个常数条件,恒不相等,可直接修改为,goto 7b3bd6924000013240 7b3bd69240 mov x26, x2500013244 7b3bd69244 cmp w28, #0x1b ; w28 = 0x1b00013248 7b3bd69248 b.eq 0x7b3bd69264 ; 常数条件,恒成立, goto 0x7b3bd6926400013264 7b3bd69264 cmp w19, #0x92 ; w19 == 0x0x7200013268 7b3bd69268 b.eq 0x7b3bd6936c ; 常数条件,不成立, goto 0x7b3bd6926c0001326c 7b3bd6926c b 0x7b3bd692d0

在反编译器中反编译这个函数的某次trace,反编译器会自动识别以上常数条件,执行优化后的代码效果:
var_60:8 = x28;...var_8:8 = x30;x27_1 = tpidr_el0;var_68:8 = [x27_1 + 0x28]:8;w20_1 = w2;pc_1 = 0x7b3bd6917c + sx.64([0x7b3bd69194]:4) + sx.64([0x7b3be97450]:1 + 0xb6);var_6c:4 = 0x0;x22_1 = sub_7b3bd611a0 ( 0x20 ); // 调用mallocw23_1 = (sx.64(w20_1) * 0x68db8bad >>s 0x2c).w + (sx.64(w20_1) * 0x68db8bad >> 0x3f).w;x9_9 = sx.64(w20_1 - w23_1 * 0x2710) * 0x51eb851f;x14_2 = sx.64(w20_1) * 0x51eb851f;w24_1 = (x9_9 >>s 0x25).w + (x9_9 >> 0x3f).w;if (x22_1 == 0x0 == 0x0) { // malloc返回的指针 != null x2_2 = zx.64(w20_1 - ((x14_2 >>s 0x25).w + (x14_2 >> 0x3f).w) * 0x64); t27_1:8 = x22_1; [t27_1:8]:8 = 0x0; [t27_1:8 + 0x8]:8 = 0x0; t27_2:8 = x22_1; [t27_2:8]:4 = w23_1; [t27_2:8 + 0x4]:4 = w24_1; [x22_1 + 0x8]:4 = x2_2.w2; [x22_1 + 0x10]:8 = x0; [x22_1 + 0x18]:8 = x3; x26_3 = sub_7b3bd65c18 ( zx.64(w23_1), zx.64(w24_1), x2_2, 0x1, x22_1, &var_6c:4 ); // 调用 sub_fc18 sub_7b3bd61290 ( x22_1 ); // 调用free if (x26_3 != 0x0) { if ([x27_1 + 0x28]:8 != var_68:8) { // 该分支没有执行 // stack_chk_fail() } return x26_3; // 返回函数sub_fc18的调用结果 }}

以上代码即是doCommandNative反混淆之后的最终代码,可以看到流程平坦混淆中对的状态变量的更新和测试均已经被优化掉,代码逻辑已经也比较清晰了。

另外一处包含循环的是LiteVM的加密解释器,这部分代码不便贴出,这里使用看雪培训ollvm8中的sub_fcb4函数的反混淆效果代替。

这个函数代码较长,放在文章最后。


三  VM还原


阿里的这个VM我猜测是使用LLVM后端实现的,并没有经过二进制->VM的转换,而是由源码->LLVM IR->LLVM LiteVM后端编译直接编译出LiteVM指令的二进制文件。

PC上VM防护大都是将编译好的二进制的指令集转换成自定义的指令集,它对应的还原方式也都是尝试将VM指令还原到原始指令。

对于阿里这种没有原始指令集的一类VM,貌似还没有相关的还原文章。

我采用的是最笨的方法,人肉分析LiteVM所有指令的编码和语义,将指令转成我们反编译器的IR,使用反编译器生成高级伪码。

LiteVM架构


LiteVM的整体架构如下:

LiteVM的寄存器:



它的一些特性:
  • 64位寄存器结构虚拟机

  • 固定4字节指令长度。指令集类似MIPS:条件跳转不使用条件码;只有少量opcode,更具体的功能通过funct码区分。

  • 很多指令有32,64位两个版本。

  • 很多指令有寄存器,立即数两个版本。

  • 完备的FFI实现,可调用虚拟化模块外的虚拟化代码或者是本地代码。

  • 具备条件执行能力。


LiteVM解释器实现


LiteVM的解释器入口位于0xdfe0c,代码结构如下:
x27_1 = tpidr_el0;w26_1 = w2;var_68_1:8 = [x27_1 + 0x28]:8;pc_1 = 0x7b3be35e4c + ~(sx.64([0x7b3be35e70]:4) + 0x65 - 0xdd) + 0xfc;x19_1 = x0;if (w26_1 u> 0x3f == 0x0) { ... if ([x24_2]:4 == 0x0) { ... if ([x28_1 + 0x98]:4 == 0x0) { // 模块是否已经解码 if (0x2 u<= [x28_1 + 0x58]:4) { // 模块至少包含两条指令 x20_1 = var_78_1:8; w24_3 = 0x1; while ( 0x1 ) { // 解码模块中的所有指令 x9_11 = [x28_1 + 0x48]:8 + (zx.64(w24_3) << 0x2); x8_13 = zx.64([x9_11]:4); // 取得32位指令 w8_13 = x8_13.w8; if ((~w8_13 & 0x1f) != 0x0) { //指令是否加密 // 未加密 x10_8 = zx.64(w8_13 & 0x1f); // opcode = insn & 0x1f, insn的低5位 if (x10_8.w10 u> 0x14) {LABEL_7b3be36370:0: // 取下条指令 w24_3 = w24_3 + 0x1; x20_1 = x20_1 + 0x10; if ([x28_1 + 0x58]:4 u<= w24_3) { break; } else { continue; } } switch ( sx.64([0x7b3be50570 + (x10_8 << 0x2)]:4) + 0x7b3be50570 ) { // switch (opcode) case 0x7b3be35f44: // case 0, 算数指令 reg op reg w9_30 = (w8_13 >> 0x5 | w8_13 << 0x1b) & 0xf800000f & 0xf; [x20_1 + 0x4]:1 = w9_30.b; // 功能码 [x20_1 + 0x5]:1 = ((w8_13 >> 0x9 | w8_13 << 0x17) & 0xff80001f & 0x1f).b; // 源操作数1 [x20_1 + 0x6]:1 = ((w8_13 >> 0xe | w8_13 << 0x12) & 0xfffc001f & 0x1f).b; // 源操作数2 [x20_1 + 0x7]:1 = ((w8_13 >> 0x13 | w8_13 << 0xd) & 0xffffe01f & 0x1f).b; // 目的操作数 if (w9_30 u<= 0x3) { w8_15 = 0x20; } else { if (w9_30 u<= 0x7) { w8_15 = 0x21; } else { if (w9_30 == 0xf) { goto @LABEL_7b3be36370:0; } w8_15 = w9_30 + 0x55; } } break; case [0x7b3be35f74...0x7b3be36228]: ... break; } [x20_1]:4 = w8_15; goto @LABEL_7b3be36370:0; } else { // 指令被加密 ... } } } [x28_1 + 0x98]:4 = 0x1; // 设置模块解码标记 ... } ... // 进入使用computed-goto实现的解释器,缺少参数x5 = 0x7b3be96ad0(0x140ad0, vm handles) sub_7b3be47a24 ( x19_1, x23_2, var_78_1:8, zx.64((zx.64((zx.64(w1)).w - 0x4)).w >> 0x2), x21_1 ); ...

LiteVM为了提升解释性能,避免每次执行指令都需要重新解码一次,会先将模块所有指令提前解码成以下结构:
struct DecodedInstruction { u4 opcode; // 操作码 off: 0 u1 func; // 功能码 off: 4 u1 rs; // 源操作数1 off: 5 u1 rt; // 源操作数2,目的操作数 off: 6 u1 rd; // 目的操作数 off: 7 s8 imm; // 有符号立即数 off: 8};

同时使用一种叫Threaded Interpretation的解释器,解释器位于0xf1a24。

关于该解释器原理可以参考这篇文章
Computed goto for efficient dispatch tables

为了修复这个解释器的CFG,我使用类似对间接跳转的处理方法,把其他vm handlers的地址硬加到第一个间接跳转后面:
000f1b58 7b3be47b58 br x8

反编译后的代码结构如下:
x26_1 = x0;x28_1 = x1;x19_1 = x2;x27_1 = x19_1;x24_1 = x4;x20_1 = x5;[x24_1 + 0x78]:8 = x27_1;x27_2 = x27_1 + (x3 << 0x4);x8_2 = [x20_1 + (zx.64([x27_2]:1) << 0x3)]:8;[x28_1 + 0x10]:8 = x27_2;switch ( x8_2 ) { case 0xf1ab8:LABEL_f1ab8:0: x8_324 = [x20_1 + (zx.64([x27_2]:1) << 0x3)]:8; [x28_1 + 0x10]:8 = x27_2; goto [x8_324]; case 0xf1ad8: ... goto [x8_322]; case ... ... // 约160个case

LiteVM的handler表位于0x140ad0,handler都没有混淆,很是比较好分析的,有兴趣的朋友请自行分析。

我这里只分析两条函数调用相关的两个handler,以了解反编译器所需的调用约定信息。
case 0xf1ad8: // 进入函数,完成栈空间分配,保存寄存器,复制参数的局部寄存器 x11_6 = [x24_1 + 0x70]:8; // 0x70 / 8 = 14 => x14(sp) [x24_1 + 0x70]:8 = x11_6 + [x27_2 + 0x8]:8; // x14 = x14 + DecodedInstruction.imm ; 开辟函数栈帧 t1_62:8 = x24_1 + 0x80; // x16 x10_23 = [t1_62:8 + 0x8]:8; // x17 t16_4:8 = x11_6; [t16_4:8]:8 = [t1_62:8]:8; // [x14] = x16;这里保存到了caller的栈帧? [t16_4:8 + 0x8]:8 = x10_23; // [x14 + 8] = x17 ; 保存x16~x17 t1_63:8 = x24_1 + 0x90; // x18 x10_24 = [t1_63:8 + 0x8]:8; // x19 t1_64:8 = x11_6 + 0x10; [t1_64:8]:8 = [t1_63:8]:8; // [x14 + 0x10] = x18 [t1_64:8 + 0x8]:8 = x10_24; // [x14 + 0x18] = x19 ; 保存x18-x19 t1_65:8 = x24_1 + 0xa0; x10_25 = [t1_65:8 + 0x8]:8; t1_66:8 = x11_6 + 0x20; [t1_66:8]:8 = [t1_65:8]:8; // [x14 + 0x20] = x20 [t1_66:8 + 0x8]:8 = x10_25; // [x14 + 0x28] = x21 ; 保存x20~x21 t1_67:8 = x24_1 + 0xb0; x10_26 = [t1_67:8 + 0x8]:8; t1_68:8 = x11_6 + 0x30; [t1_68:8]:8 = [t1_67:8]:8; // [x14 + 0x30] = x22 [t1_68:8 + 0x8]:8 = x10_26; // [x14 + 0x38] = x23 ; 保存x22~x23 t1_69:8 = x24_1 + 0xc0; x10_27 = [t1_69:8 + 0x8]:8; t1_70:8 = x11_6 + 0x40; [t1_70:8]:8 = [t1_69:8]:8; // [x14 + 0x40] = x24 [t1_70:8 + 0x8]:8 = x10_27; // [x14 + 0x48] = x25 ; 保存x24~x25 t1_71:8 = x24_1 + 0xd0; x10_28 = [t1_71:8 + 0x8]:8; t1_72:8 = x11_6 + 0x50; [t1_72:8]:8 = [t1_71:8]:8; // [x14 + 0x50] = x26 [t1_72:8 + 0x8]:8 = x10_28; // [x14 + 0x58] = x27 ; 保存x26~x27 t1_73:8 = x24_1 + 0xe0; x10_29 = [t1_73:8 + 0x8]:8; t1_74:8 = x11_6 + 0x60; [t1_74:8]:8 = [t1_73:8]:8; // [x14 + 0x60] = x28 [t1_74:8 + 0x8]:8 = x10_29; // [x14 + 0x68] = x29 ; 保存x28~x29 t1_75:8 = x24_1 + 0xf0; x10_30 = [t1_75:8 + 0x8]:8; t1_76:8 = x11_6 + 0x70; [t1_76:8]:8 = [t1_75:8]:8; // [x14 + 0x70] = 0x30 [t1_76:8 + 0x8]:8 = x10_30; // [x14 + 0x78] = 0x31 ; 保存x30~x31 t1_77:8 = x24_1 + 0x40; x10_31 = [t1_77:8 + 0x8]:8; // x9 t1_78:8 = x24_1 + 0xc0; // 0xc0 / 8 = 24 => x24 [t1_78:8]:8 = [t1_77:8]:8; // x24 = x8 ; x8在函数内未定义使用,故为参数。 [t1_78:8 + 0x8]:8 = x10_31; // x25 = x9 ; 将参数复制到局部变量 x24 = x8 = arg_1, x25 = x9 = arg_2 t1_79:8 = x24_1 + 0x50; // x10 x10_32 = [t1_79:8 + 0x8]:8; // x11 t1_80:8 = x24_1 + 0xd0; [t1_80:8]:8 = [t1_79:8]:8; // x26 = x10 [t1_80:8 + 0x8]:8 = x10_32; // x27 = x11 ; 复制arg_3, arg_4 t1_81:8 = x24_1 + 0x60; // x12 x10_33 = [t1_81:8 + 0x8]:8; // x13 t1_82:8 = x24_1 + 0xe0; [t1_82:8]:8 = [t1_81:8]:8; // x28 = x12 [t1_82:8 + 0x8]:8 = x10_33;// x29 = x13 ; 复制arg_5, arg_6 [x24_1 + 0xf0]:8 = x11_6; // x30 = x14 ; fp = 原始sp [x24_1 + 0xf8]:8 = [x24_1 + 0x78]:8; // x31 = x15 ; x31 = 返回地址 x27_214 = x27_2 + 0x10; x8_322 = [x20_1 + (zx.64([x27_214]:1) << 0x3)]:8; [x28_1 + 0x10]:8 = x27_214; goto [x8_322];

使用trace快速定位到调用外部函数的实现
000df714 7b3be35714 ldr x8, [sp, #0x68] r 0x0000007b3c5fe438 8 .. f8 8b fe 3f 7b 00 00 00 34 09 7e e9 7b 00 00 00 ........4....... libsgmainso-6.5.22.so 000df718 7b3be35718 stp x24, x28, [sp, #0x20] w 0x0000007b3c5fe3f0 16 .. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ libsgmainso-6.5.22.so 000df71c 7b3be3571c stp x21, x23, [sp, #0x10] w 0x0000007b3c5fe3e0 16 .. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ libsgmainso-6.5.22.so 000df720 7b3be35720 blr x8 ........................................................................................... libsgmainso-6.5.22.so 0001c934 7be97e0934 adrp x8, 0x7be98b7000 ........................................................................................... libc.so malloc + 0000 0001c938 7be97e0938 add x8, x8, #0x0 ........................................................................................... libc.so malloc + 0004
000df714 7b3be35714 ldr x8, [sp, #0x68] r 0x0000007b3c5fdd58 8 .. 78 ba a3 5d 7b 00 00 00 d0 25 7e e9 7b 00 00 00 x............... libsgmainso-6.5.22.so 000df718 7b3be35718 stp x24, x28, [sp, #0x20] w 0x0000007b3c5fdd10 16 .. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ libsgmainso-6.5.22.so 000df71c 7b3be3571c stp x21, x23, [sp, #0x10] w 0x0000007b3c5fdd00 16 .. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ libsgmainso-6.5.22.so 000df720 7b3be35720 blr x8 ........................................................................................... libsgmainso-6.5.22.so 0001e5d0 7be97e25d0 and x4, x0, #0xfff ........................................................................................... libc.so strlen + 0000 0001e5d4 7be97e25d4 orr x8, xzr, #0x101010101010101 ........................................................................................... libc.so strlen + 0004

通过上面两个trace片段,我们基本可以确定VM使用以下指令跳转的外部函数:
000df720 7b3be35720 blr x8                                ........................................................................................... libsgmainso-6.5.22.so

0x000df720所在的函数是0xdf4e4,往上回溯定位到vm call的handler,按顺序分析调用链:
case 0xf1cbc: // 函数调用 // x24_1 = GPRs x25_3 = x28_1 + 0x38; t1_18:8 = x24_1 + 0x40; // x8 x10_2 = [t1_18:8 + 0x8]:8; // x9 t1_19:8 = x25_3 + 0x40; [t1_19:8]:8 = [t1_18:8]:8; // arg_1 = [x28_1 + 0x78]:8 = x8, 注意这个0x78偏移 [t1_19:8 + 0x8]:8 = x10_2; // arg_2 = [x28_1 + 0x80]:8 = x9,将x8,x9复制到VM传参结构 t1_20:8 = x24_1 + 0x50; x10_3 = [t1_20:8 + 0x8]:8; t1_21:8 = x25_3 + 0x50; [t1_21:8]:8 = [t1_20:8]:8; [t1_21:8 + 0x8]:8 = x10_3; t1_22:8 = x24_1 + 0x60; x10_4 = [t1_22:8 + 0x8]:8; t1_23:8 = x25_3 + 0x60; [t1_23:8]:8 = [t1_22:8]:8; [t1_23:8 + 0x8]:8 = x10_4; t1_24:8 = x24_1 + 0x70; x10_5 = [t1_24:8 + 0x8]:8; t1_25:8 = x25_3 + 0x70; [t1_25:8]:8 = [t1_24:8]:8; [t1_25:8 + 0x8]:8 = x10_5; // 完成[arg_1,arg_2,arg_3,arg_4,arg_5,arg_6,sp,lr] <= [x8,x9,x10,x11,x12,x13,14,x15] [x25_3 + 0xf0]:8 = [x24_1 + 0xf0]:8; // x30 sub_df134 ( x26_1, x28_1, [x27_2 + 0x8]:8 ); // 执行函数调用 x27_205 = [x24_1 + 0x78]:8; // x15,保存的返回地址 [x24_1 + 0x40]:8 = [x25_3 + 0x40]:8; // x8: 外部调用结果 <- [x28_1 + 0x78]:8 x27_206 = x27_205 + 0x10; // 下条指令 x8_308 = [x20_1 + (zx.64([x27_206]:1) << 0x3)]:8; [x28_1 + 0x10]:8 = x27_206; goto [x8_308];
FUNC: 0x000df4e4x8_1 = tpidr_el0;x20_1 = x1;var_68_1:8 = [x8_1 + 0x28]:8;pc_1 = 0x7b3be35528 + (~(sx.64([0x7b3be3554c]:4)) + 0xa - 0x1b ^ 0xfa);x8_7 = x20_1 + 0x0 + 0x78; // 这个0x78有没有眼熟,这个就是上上层函数调用[x28_1 + 0x78]中保存的x8x9_9 = [x8_7]:8; // 函数调用第1个参数pc_2 = 0x7b3be355d4 + (~~(sx.64([0x7b3be355f8]:4)) ^ 0x14);x19_1 = [x8_7 + 0x8]:8; // 第2t0_9:8 = x20_1 + 0x88;x22_1 = [t0_9:8]:8; // 第3x26_1 = [t0_9:8 + 0x8]:8; // 第4var_80_2:8 = [x20_1 + 0x98]:8; // 第5var_88_2:8 = [x20_1 + 0xa0]:8; // 第6var_78_2:8 = sub_7b3bd60970 ( x20_1, 0x6, x2, 0x7b3be355d4 + (~~(sx.64([0x7b3be355f8]:4)) ^ 0x14), 0xef, ~~(sx.64([0x7b3be355f8]:4)) ^ 0x14 ); // 从栈上取第7个参数x2_3 = sub_7b3bd60970 ( x20_1, 0x7 ); // 第8x5_16 = ~~(sx.64([0x7b3be355f8]:4)) ^ 0x14;var_b0_1:8 = sub_7b3bd60970 ( x20_1, 0x8, x2_3, 0x7b3be355d4 + x5_16, 0xef, x5_16 ); // 以下代码依次取第9~第15个参数var_b8_1:8 = sub_7b3bd60970 ( x20_1, 0x9 );x21_4 = sub_7b3bd60970 ( x20_1, 0xa ); x23_2 = sub_7b3bd60970 ( x20_1, 0xb );x24_2 = sub_7b3bd60970 ( x20_1, 0xc );x28_2 = sub_7b3bd60970 ( x20_1, 0xd );x25_2 = sub_7b3bd60970 ( x20_1, 0xe );// 调用外部函数,如strlen,最多支持16个参数x0_22 = x2 ( x9_9, x19_1, x22_1, x26_1, var_80_2:8, var_88_2:8, var_78_2:8, x2_3, var_b0_1:8, var_b8_1:8, x21_4, x23_2, x24_2, x28_2, x25_2, sub_7b3bd60970 ( x20_1, 0xf ) /* 16 */ );[x8_7]:8 = x0_22; //保存调用结果if ([x8_1 + 0x28]:8 != var_68_1:8) {// 0x7b3be35760}return x0_22;======================================================================FUN: 0x000dd9c4, 0x7b3bd60970if (w1 s> 0x5) { // 参数个数大于5,从栈上取 return [[x0 + 0xa8]:8 + sx.64((w1 << 0x3) - 0x30) + 0xb0]:8;}// 否则从寄存器return [x0 + (sx.64(w1) << 0x3) + 0x78]:8;

了解调用约定之后,我们就可以识别函数中使用的参数、栈上变量,函数调用参数。

LiteVM指令加密及混淆解释器实现


这是我分析10401指令遇到的最后一道防护,为了避免不必要的麻烦,这里不再详细分析他的实现。

他是vm handler表中最后一个handler,使用了前面遇到过的机器码混淆。他的CFG看起来是这样:


在IDA中反编译后大概有8000+行代码。


解释器实现框架大体是这样的:
key = initial keywhile 还有未执行的指令: opcode, transformation <- 使用key解密需要执行的加密指令 operands <- 使用transformtion信息对operands进行变换 handle(opcode, operands) // 解释该指令 key <- 对key进行变换

如果要分析这个解释器,建议先完整分析未加密指令的编码和指令的语义,会事半功倍。

效果展示


下面几个是还原的VM函数,sub_538是执行10401命令第一个运行的函数
//----- (00000538) ----------------------------------------------------x24_1 = x8;x25_1 = x9;if (x24_1 != 0x0 && x25_1 != 0x0) { x26_2 = [x24_1]:8; if (x26_2 != 0x0 && [x26_2 + 0x8]:4 != 0x0 && [x24_1 + 0x18]:4 u< 0x15) { x26_5 = sub_12ba0 (); if (x26_5 != 0x0) { [x25_1]:4 = wzr; var_30:8 = [x24_1]:8; var_28:8 = x26_5; var_20:8 = x25_1; var_18:8 = [x24_1 + 0x8]:8; var_10:8 = [x24_1 + 0x10]:8; var_8:4 = [x24_1 + 0x1c]:4; x27_9 = [0x730fa29ce8 + (zx.64([x24_1 + 0x18]:4) << 0x3)]:8; if (x27_9 == 0x0) { [x25_1]:4 = 0x62; x27_2 = 0x0; goto @LABEL_5880:0; } x27_9 ( &var_30:8 ); x27_2 = 0x0; if ([x25_1]:4 == 0x0) { if ([x26_5 + 0x8]:4 == 0x0) { [x25_1]:4 = 0x63; goto @LABEL_5880:0; } x8_5 = sub_15f80 ( x26_5 ); if (x8_5 == 0x0) { [x25_1]:4 = 0x2; goto @LABEL_5880:0; } [x25_1]:4 = 0x63; x27_2 = x8_5; if ([x8_5 + 0x8]:4 == 0x0) { goto @LABEL_5880:0; } } else {LABEL_5880:0: var_38:8 = xzr; var_40:8 = xzr; sub_1a8d0 ( &var_40:8, 0x730fa29d90, zx.64([x24_1 + 0x18]:4) ); var_80:4 = 0x1; var_7c:4 = [x25_1]:4; var_78:8 = 0x186a1; var_70:4 = 0x0; var_68:4 = 0x73; var_6c:4 = 0xfa29d93; x25_9 = [x24_1 + 0x8]:8; var_64:4 = x25_9.w25; var_60:4 = (x25_9 >> 0x20).w; x24_4 = [x24_1 + 0x10]:8; var_5c:4 = x24_4.w24; var_58:4 = (x24_4 >> 0x20).w; var_50:4 = (&arg_ffffffff).w; var_54:4 = (&var_40:8).w; var_48:4 = 0x0; var_4c:4 = 0x0; var_a8:8 = 0x1; var_a0:8 = 0xe; var_98:8 = 0x5; var_90:8 = xzr; var_88:8 = &var_80:4; sub_1e150 ( &var_a8:8, 0x0 ); } sub_13880 ( x26_5 ); return x27_2; } [x25_1]:4 = 0x2; x27_2 = 0x0; return x27_2; }}x27_2 = 0x0;if (x25_1 != 0x0) { [x25_1]:4 = 0x1;}return x27_2; //----- (000012ba) ----------------------------------------------------x26_2 = sub_1de10 ( 0x68 );x24_2 = 0x0;if (x26_2 == 0x0) { return x24_2;}sub_1de20 ( x26_2 + 0x8, 0x0, 0x60 );[x26_2 + 0xc]:4 = 0x64;[x26_2]:8 = xzr;x25_3 = sub_1de10 ( 0x64 );if (x25_3 != 0x0) { sub_1de20 ( x25_3, 0x0, 0x64 ); [x26_2]:8 = x25_3; [x26_2 + 0x8]:4 = wzr; var_4:4 = wzr; [x26_2 + 0x10]:8 = sub_1e200 ( 0x0, &var_4:4 ); [x26_2 + 0x20]:8 = sub_1e200 ( 0x1, &var_4:4 ); [x26_2 + 0x28]:8 = sub_1e200 ( 0x2, &var_4:4 ); [x26_2 + 0x30]:8 = sub_1e200 ( 0x3, &var_4:4 ); [x26_2 + 0x38]:8 = sub_1e200 ( 0x4, &var_4:4 ); [x26_2 + 0x40]:8 = sub_1e200 ( 0x5, &var_4:4 ); [x26_2 + 0x48]:8 = sub_1e200 ( 0x6, &var_4:4 ); [x26_2 + 0x18]:8 = sub_1e200 ( 0x7, &var_4:4 ); [x26_2 + 0x50]:8 = sub_1e200 ( 0x8, &var_4:4 ); [x26_2 + 0x58]:8 = sub_1e200 ( 0x9, &var_4:4 ); [x26_2 + 0x60]:8 = sub_1e200 ( 0xa, &var_4:4 ); x24_2 = x26_2; return x24_2;}sub_1de30 ( x26_2 );return 0x0; //----- (00001a8d) ----------------------------------------------------x24_1 = x8;x25_1 = x9;arg_a8:8 = x13;arg_a0:8 = x12;arg_98:8 = x11;arg_90:8 = x10;var_8_1:8 = &arg_90:8;x26_3 = sub_1e0a0 ( x25_1 );x29_2 = x26_3 + 0x1;x8_4 = sub_1e0b0 ( x29_2 );x27_2 = x8_4;sub_1e0c0 ( x8_4, 0x0, x29_2 );sub_1e0d0 ( x27_2, x25_1, x26_3 );w17_1 = 0x0;if (x26_3 != 0x0) { x19_1 = x26_3 + 0xffffffffffffffff; x3_1 = 0x0; x17_2 = 0x0; w17_1 = 0x0; x22_1 = 0x0; w22_1 = 0x0; x16_1 = x24_1; x21_1 = x27_2; w23_1 = 0x0; do { w20_1 = zx.32([x27_2 + x3_1]:1); if (x3_1 == x19_1 || (x4_1 = x22_1, (w20_1 & 0xff) == 0x25)) { x29_6 = x27_2 + x3_1; x2_1 = x16_1; if (x21_1 != 0x0) { if (x3_1 != x19_1) { [x29_6]:4 = 0x0; } if (w23_1 == 0x2) { x2_3 = var_8_1:8; var_8_1:8 = x2_3 + 0x10; x8_9 = sub_1e110 ( x16_1, x21_1, [x2_3]:8, [x2_3 + 0x8]:8 ); goto @LABEL_1add0:0; } if (w23_1 != 0x1) { if (w23_1 == 0x0) { x8_9 = sub_1e110 ( x16_1, x21_1, 0x0 ); goto @LABEL_1add0:0; } } else { x2_2 = var_8_1:8; var_8_1:8 = x2_2 + 0x8; x8_9 = sub_1e110 ( x16_1, x21_1, [x2_2]:8 );LABEL_1add0:0: x17_2 = x8_9 + x17_2; w17_1 = x17_2.w17; } [x29_6]:4 = w20_1; x2_1 = x24_1 + zx.64(w17_1); if (x16_1 == 0x0) { x2_1 = 0x0; } } x3_7 = x22_1 + 0x1; x5_1 = zx.64(x3_7.w3); x16_1 = x2_1; x4_1 = x22_1; if (x26_3 u> x5_1) { w5_2 = zx.32([x25_1 + x5_1]:1); w23_1 = 0x0; x21_1 = x29_6; x16_1 = x2_1; x4_1 = x3_7; if (w5_2 != 0x25) { if (w5_2 == 0x2a || w5_2 == 0x2e && (x3_9 = zx.64(w22_1) + 0x2, x26_3 u> x3_9) && zx.32([x25_1 + x3_9]:1) == 0x2a) { w23_1 = 0x2; } else { w23_1 = 0x1; } x21_1 = x29_6; x16_1 = x2_1; x4_1 = x22_1; } } } x22_1 = x4_1 + 0x1; w22_1 = x22_1.w22; x3_1 = zx.64(w22_1); } while ( x3_1 u< x26_3 );}sub_1e0f0 ( x27_2 );return zx.64(w17_1);

看雪培训ollvm9 sub_fcb4反混淆效果(更复杂了?)。
w11_1 = w1 - 0x2;[x0 + 0x17]:1 = ([x0 + 0x18]:1).b;t42_1:4 = 0x8ed87f2a;if (0x0 s>= w10_1) { t42_1:4 = 0xa0504942;}w22_5 = t42_1:4;if (w22_5 s<= 0xa0504941 && w22_5 s> 0x8ed87f29 && w22_5 == 0x8ed87f2a) { w20_1 = [x0 + 0x0]:1; [x0 + 0x0]:1 = (w20_1 ^ 0x1).b; t42_7:4 = 0x8ed87f2a; if (0x1 s>= w10_1) { t42_7:4 = 0xa0504942; } w22_22 = t42_7:4; if (w22_22 s<= 0xa0504941 && w22_22 s> 0x8ed87f29 && w22_22 == 0x8ed87f2a) { w20_3 = [x0 + 0x1]:1; [x0 + 0x1]:1 = (w20_3 ^ 0x1).b; w21_4 = (0xff ^ w20_1).b ^ w20_3; x11_3 = zx.64(0x0 + w20_1 + w20_3); t42_13:4 = 0x8ed87f2a; if (0x2 s>= w10_1) { t42_13:4 = 0xa0504942; } w22_39 = t42_13:4; if (w22_39 s<= 0xa0504941 && w22_39 s> 0x8ed87f29 && w22_39 == 0x8ed87f2a) { w20_5 = [x0 + 0x2]:1; [x0 + 0x2]:1 = (w20_5 ^ 0x1).b; t42_19:4 = 0x8ed87f2a; if (0x3 s>= w10_1) { t42_19:4 = 0xa0504942; } w22_56 = t42_19:4; if (w22_56 s<= 0xa0504941 && w22_56 s> 0x8ed87f29 && w22_56 == 0x8ed87f2a) { w20_7 = [x0 + 0x3]:1; w21_7 = (w21_4.b ^ w20_5).b; [x0 + 0x3]:1 = (w20_7 ^ 0x1).b; t31_4:4 = x11_3.w11 + w20_5 + w20_7; t42_25:4 = 0x8ed87f2a; if (0x4 s>= w10_1) { t42_25:4 = 0xa0504942; } w22_73 = t42_25:4; if (w22_73 s<= 0xa0504941 && w22_73 s> 0x8ed87f29 && w22_73 == 0x8ed87f2a) { w20_9 = [x0 + 0x4]:1; [x0 + 0x4]:1 = (w20_9 ^ 0x1).b; w21_10 = (w21_7 ^ w20_7).b ^ w20_9; t42_31:4 = 0x8ed87f2a; if (0x5 s>= w10_1) { t42_31:4 = 0xa0504942; } w22_90 = t42_31:4; if (w22_90 s<= 0xa0504941 && w22_90 s> 0x8ed87f29 && w22_90 == 0x8ed87f2a) { w20_11 = [x0 + 0x5]:1; t12_47:4 = t31_4:4 + w20_9; [x0 + 0x5]:1 = (w20_11 ^ 0x1).b; t42_37:4 = 0x8ed87f2a; if (0x6 s>= w10_1) { t42_37:4 = 0xa0504942; } w22_107 = t42_37:4; if (w22_107 s<= 0xa0504941 && w22_107 s> 0x8ed87f29 && w22_107 == 0x8ed87f2a) { w20_13 = [x0 + 0x6]:1; w21_13 = (w21_10.b ^ w20_11).b; [x0 + 0x6]:1 = (w20_13 ^ 0x1).b; x20_14 = zx.64(t12_47:4 + w20_11 + w20_13); t42_43:4 = 0x8ed87f2a; if (0x7 s>= w10_1) { t42_43:4 = 0xa0504942; } w22_124 = t42_43:4; if (w22_124 s<= 0xa0504941 && w22_124 s> 0x8ed87f29 && w22_124 == 0x8ed87f2a) { w20_15 = [x0 + 0x7]:1; [x0 + 0x7]:1 = (w20_15 ^ 0x1).b; w21_16 = (w21_13 ^ w20_13).b ^ w20_15; t42_49:4 = 0x8ed87f2a; if (0x8 s>= w10_1) { t42_49:4 = 0xa0504942; } w22_141 = t42_49:4; if (w22_141 s<= 0xa0504941 && w22_141 s> 0x8ed87f29 && w22_141 == 0x8ed87f2a) { [x0 + 0x8]:1 = 0x2d; w20_18 = x20_14.w20 + w20_15; t42_58:4 = 0x8ed87f2a; if (0x9 s>= w10_1) { t42_58:4 = 0xa0504942; } w22_161 = t42_58:4; if (w22_161 s<= 0xa0504941 && w22_161 s> 0x8ed87f29 && w22_161 == 0x8ed87f2a) { w20_19 = [x0 + 0x9]:1; [x0 + 0x9]:1 = (w20_19 ^ 0x1).b; w21_19 = w21_16.b ^ w20_19; t42_64:4 = 0x8ed87f2a; if (0xa s>= w10_1) { t42_64:4 = 0xa0504942; } w22_178 = t42_64:4; if (w22_178 s<= 0xa0504941 && w22_178 s> 0x8ed87f29 && w22_178 == 0x8ed87f2a) { w20_21 = [x0 + 0xa]:1; w23_44 = w20_18 + w20_19; [x0 + 0xa]:1 = (w20_21 ^ 0x1).b; t42_70:4 = 0x8ed87f2a; if (0xb s>= w10_1) { t42_70:4 = 0xa0504942; } w22_195 = t42_70:4; if (w22_195 s<= 0xa0504941 && w22_195 s> 0x8ed87f29 && w22_195 == 0x8ed87f2a) { w20_23 = [x0 + 0xb]:1; w21_22 = (w21_19.b ^ w20_21).b; [x0 + 0xb]:1 = (w20_23 ^ 0x1).b; x11_13 = zx.64(w23_44 + w20_21 + w20_23); t42_76:4 = 0x8ed87f2a; if (0xc s>= w10_1) { t42_76:4 = 0xa0504942; } w22_212 = t42_76:4; if (w22_212 s<= 0xa0504941 && w22_212 s> 0x8ed87f29 && w22_212 == 0x8ed87f2a) { w20_25 = [x0 + 0xc]:1; [x0 + 0xc]:1 = (w20_25 ^ 0x1).b; w21_25 = (w21_22 ^ w20_23).b ^ w20_25; t42_82:4 = 0x8ed87f2a; if (0xd s>= w10_1) { t42_82:4 = 0xa0504942; } w22_229 = t42_82:4; if (w22_229 s<= 0xa0504941 && w22_229 s> 0x8ed87f29 && w22_229 == 0x8ed87f2a) { [x0 + 0xd]:1 = 0x2d; t42_91:4 = 0x8ed87f2a; if (0xe s>= w10_1) { t42_91:4 = 0xa0504942; } w22_249 = t42_91:4; if (w22_249 s<= 0xa0504941 && w22_249 s> 0x8ed87f29 && w22_249 == 0x8ed87f2a) { [x0 + 0xe]:1 = 0x34; t12_120:4 = x11_13.w11 + w20_25; t42_103:4 = 0x8ed87f2a; if (0xf s>= w10_1) { t42_103:4 = 0xa0504942; } w22_271 = t42_103:4; if (w22_271 s<= 0xa0504941 && w22_271 s> 0x8ed87f29 && w22_271 == 0x8ed87f2a) { w20_32 = [x0 + 0xf]:1; t4_92:1 = w21_25.b; [x0 + 0xf]:1 = (w20_32 ^ 0x1).b; t42_109:4 = 0x8ed87f2a; if (0x10 s>= w10_1) { t42_109:4 = 0xa0504942; } w22_288 = t42_109:4; if (w22_288 s<= 0xa0504941 && w22_288 s> 0x8ed87f29 && w22_288 == 0x8ed87f2a) { w20_34 = [x0 + 0x10]:1; t12_136:4 = t12_120:4 + w20_32; [x0 + 0x10]:1 = (w20_34 ^ 0x1).b; t29_14:4 = (t4_92:1 ^ w20_32).b ^ w20_34; t42_115:4 = 0x8ed87f2a; if (0x11 s>= w10_1) { t42_115:4 = 0xa0504942; } w22_305 = t42_115:4; if (w22_305 s<= 0xa0504941 && w22_305 s> 0x8ed87f29 && w22_305 == 0x8ed87f2a) { w20_36 = [x0 + 0x11]:1; [x0 + 0x11]:1 = (w20_36 ^ 0x1).b; x20_37 = zx.64(t12_136:4 + w20_34 + w20_36); t42_121:4 = 0x8ed87f2a; if (0x12 s>= w10_1) { t42_121:4 = 0xa0504942; } w22_322 = t42_121:4; if (w22_322 s<= 0xa0504941 && w22_322 s> 0x8ed87f29 && w22_322 == 0x8ed87f2a) { [x0 + 0x12]:1 = 0x2d; t4_110:1 = (t29_14:4.b ^ w20_36).b; t42_130:4 = 0x8ed87f2a; if (0x13 s>= w10_1) { t42_130:4 = 0xa0504942; } w22_342 = t42_130:4; if (w22_342 s<= 0xa0504941 && w22_342 s> 0x8ed87f29 && w22_342 == 0x8ed87f2a) { w20_40 = [x0 + 0x13]:1; [x0 + 0x13]:1 = (w20_40 ^ 0x1).b; w20_41 = x20_37.w20 + w20_40; t42_136:4 = 0x8ed87f2a; if (0x14 s>= w10_1) { t42_136:4 = 0xa0504942; } w22_359 = t42_136:4; if (w22_359 s<= 0xa0504941 && w22_359 s> 0x8ed87f29 && w22_359 == 0x8ed87f2a) { w20_42 = [x0 + 0x14]:1; t4_120:1 = (t4_110:1.b ^ w20_40).b; [x0 + 0x14]:1 = (w20_42 ^ 0x1).b; t42_142:4 = 0x8ed87f2a; if (0x15 s>= w10_1) { t42_142:4 = 0xa0504942; } w22_376 = t42_142:4; if (w22_376 s<= 0xa0504941 && w22_376 s> 0x8ed87f29 && w22_376 == 0x8ed87f2a) { w20_44 = [x0 + 0x15]:1; w23_88 = w20_41 + w20_42; [x0 + 0x15]:1 = (w20_44 ^ 0x1).b; t29_18:4 = (t4_120:1 ^ w20_42).b ^ w20_44; t42_148:4 = 0x8ed87f2a; if (0x16 s>= w10_1) { t42_148:4 = 0xa0504942; } w22_393 = t42_148:4; if (w22_393 s<= 0xa0504941 && w22_393 s> 0x8ed87f29 && w22_393 == 0x8ed87f2a) { w20_46 = [x0 + 0x16]:1; [x0 + 0x16]:1 = (w20_46 ^ 0x1).b; x11_24 = zx.64(w23_88 + w20_44 + w20_46); t42_154:4 = 0x8ed87f2a; if (0x17 s>= w10_1) { t42_154:4 = 0xa0504942; } w22_410 = t42_154:4; if (w22_410 s<= 0xa0504941 && w22_410 s> 0x8ed87f29 && w22_410 == 0x8ed87f2a) { w20_48 = [x0 + 0x17]:1; t4_135:1 = (t29_18:4.b ^ w20_46).b; [x0 + 0x17]:1 = (w20_48 ^ 0x1).b; t42_160:4 = 0x8ed87f2a; if (0x18 s>= w10_1) { t42_160:4 = 0xa0504942; } w22_427 = t42_160:4; if (w22_427 s<= 0xa0504941 && w22_427 s> 0x8ed87f29 && w22_427 == 0x8ed87f2a) { [x0 + 0x18]:1 = 0x2d; t42_169:4 = 0x8ed87f2a; if (0x19 s>= w10_1) { t42_169:4 = 0xa0504942; } w22_447 = t42_169:4; if (w22_447 s<= 0xa0504941 && w22_447 s> 0x8ed87f29 && w22_447 == 0x8ed87f2a) { w20_52 = [x0 + 0x19]:1; t4_148:1 = (t4_135:1 ^ w20_48).b; t12_208:4 = x11_24.w11 + w20_48; [x0 + 0x19]:1 = (w20_52 ^ 0x1).b; t42_175:4 = 0x8ed87f2a; if (0x1a s>= w10_1) { t42_175:4 = 0xa0504942; } w22_464 = t42_175:4; if (w22_464 s<= 0xa0504941 && w22_464 s> 0x8ed87f29 && w22_464 == 0x8ed87f2a) { w20_54 = [x0 + 0x1a]:1; [x0 + 0x1a]:1 = (w20_54 ^ 0x1).b; t29_22:4 = (t4_148:1 ^ w20_52).b ^ w20_54; x20_55 = zx.64(t12_208:4 + w20_52 + w20_54); t42_181:4 = 0x8ed87f2a; if (0x1b s>= w10_1) { t42_181:4 = 0xa0504942; } w22_481 = t42_181:4; if (w22_481 s<= 0xa0504941 && w22_481 s> 0x8ed87f29 && w22_481 == 0x8ed87f2a) { w20_56 = [x0 + 0x1b]:1; [x0 + 0x1b]:1 = (w20_56 ^ 0x1).b; t42_187:4 = 0x8ed87f2a; if (0x1c s>= w10_1) { t42_187:4 = 0xa0504942; } w22_498 = t42_187:4; if (w22_498 s<= 0xa0504941 && w22_498 s> 0x8ed87f29 && w22_498 == 0x8ed87f2a) { w20_58 = [x0 + 0x1c]:1; t4_163:1 = (t29_22:4.b ^ w20_56).b; w23_116 = x20_55.w20 + w20_56; [x0 + 0x1c]:1 = (w20_58 ^ 0x1).b; t42_193:4 = 0x8ed87f2a; if (0x1d s>= w10_1) { t42_193:4 = 0xa0504942; } w22_515 = t42_193:4; if (w22_515 s<= 0xa0504941 && w22_515 s> 0x8ed87f29 && w22_515 == 0x8ed87f2a) { w20_60 = [x0 + 0x1d]:1; [x0 + 0x1d]:1 = (w20_60 ^ 0x1).b; t29_25:4 = (t4_163:1 ^ w20_58).b ^ w20_60; x11_31 = zx.64(w23_116 + w20_58 + w20_60); t42_199:4 = 0x8ed87f2a; if (0x1e s>= w10_1) { t42_199:4 = 0xa0504942; } w22_532 = t42_199:4; if (w22_532 s<= 0xa0504941 && w22_532 s> 0x8ed87f29 && w22_532 == 0x8ed87f2a) { w20_62 = [x0 + 0x1e]:1; [x0 + 0x1e]:1 = (w20_62 ^ 0x1).b; t42_205:4 = 0x8ed87f2a; if (0x1f s>= w10_1) { t42_205:4 = 0xa0504942; } w22_549 = t42_205:4; if (w22_549 s<= 0xa0504941 && w22_549 s> 0x8ed87f29 && w22_549 == 0x8ed87f2a) { w20_64 = [x0 + 0x1f]:1; t4_178:1 = (t29_25:4.b ^ w20_62).b; [x0 + 0x1f]:1 = (w20_64 ^ 0x1).b; t31_27:4 = x11_31.w11 + w20_62 + w20_64; t42_211:4 = 0x8ed87f2a; if (0x20 s>= w10_1) { t42_211:4 = 0xa0504942; } w22_566 = t42_211:4; if (w22_566 s<= 0xa0504941 && w22_566 s> 0x8ed87f29 && w22_566 == 0x8ed87f2a) { w20_66 = [x0 + 0x20]:1; [x0 + 0x20]:1 = (w20_66 ^ 0x1).b; t29_28:4 = (t4_178:1 ^ w20_64).b ^ w20_66; t42_217:4 = 0x8ed87f2a; if (0x21 s>= w10_1) { t42_217:4 = 0xa0504942; } w22_583 = t42_217:4; if (w22_583 s<= 0xa0504941 && w22_583 s> 0x8ed87f29 && w22_583 == 0x8ed87f2a) { w20_68 = [x0 + 0x21]:1; t12_272:4 = t31_27:4 + w20_66; [x0 + 0x21]:1 = (w20_68 ^ 0x1).b; x1_35 = zx.64(t29_28:4.b ^ w20_68); var_34_109:4 = t12_272:4 + w20_68; t42_223:4 = 0x8ed87f2a; if (0x22 s>= w10_1) { t42_223:4 = 0xa0504942; } w22_600 = t42_223:4; if (w22_600 s<= 0xa0504941) { } if (w22_600 s> 0x6594a299) { } if (w22_600 == 0xa147cdcc) { } if (w22_600 == 0xaa4530e6) { } if (w22_600 != 0xa0504942) { } [x0 + 0x23]:1 = ([0x7e68e7b060 + (zx.64(x1_35.b) & 0xf)]:1).b; t37_1:8 = zx.64(var_34_109:4 - (var_34_109:4 & 0xfffffff0)); [x0 + 0x22]:1 = ([0x7e68e7b060 + t37_1:8]:1).b; return x0; } } } } } } } } } } } } } } } } } } } } } } } } } } } } } } } } }}

只提取出其中的内存访问相关的代码可以更为明显的看到循环被展开了。
[x0 + 0x17]:1 = ([x0 + 0x18]:1).b;w20_1 = [x0 + 0x0]:1;[x0 + 0x0]:1 = (w20_1 ^ 0x1).b;w20_3 = [x0 + 0x1]:1;[x0 + 0x1]:1 = (w20_3 ^ 0x1).b;w20_5 = [x0 + 0x2]:1;[x0 + 0x2]:1 = (w20_5 ^ 0x1).b;w20_7 = [x0 + 0x3]:1;[x0 + 0x3]:1 = (w20_7 ^ 0x1).b;w20_9 = [x0 + 0x4]:1;[x0 + 0x4]:1 = (w20_9 ^ 0x1).b;w20_11 = [x0 + 0x5]:1;[x0 + 0x5]:1 = (w20_11 ^ 0x1).b;w20_13 = [x0 + 0x6]:1;[x0 + 0x6]:1 = (w20_13 ^ 0x1).b;w20_15 = [x0 + 0x7]:1;[x0 + 0x7]:1 = (w20_15 ^ 0x1).b;[x0 + 0x8]:1 = 0x2d;w20_19 = [x0 + 0x9]:1;[x0 + 0x9]:1 = (w20_19 ^ 0x1).b;w20_21 = [x0 + 0xa]:1;[x0 + 0xa]:1 = (w20_21 ^ 0x1).b;w20_23 = [x0 + 0xb]:1;[x0 + 0xb]:1 = (w20_23 ^ 0x1).b;w20_25 = [x0 + 0xc]:1;[x0 + 0xc]:1 = (w20_25 ^ 0x1).b;[x0 + 0xd]:1 = 0x2d;[x0 + 0xe]:1 = 0x34;w20_32 = [x0 + 0xf]:1;[x0 + 0xf]:1 = (w20_32 ^ 0x1).b;w20_34 = [x0 + 0x10]:1;[x0 + 0x10]:1 = (w20_34 ^ 0x1).b;w20_36 = [x0 + 0x11]:1;[x0 + 0x11]:1 = (w20_36 ^ 0x1).b;[x0 + 0x12]:1 = 0x2d;w20_40 = [x0 + 0x13]:1;[x0 + 0x13]:1 = (w20_40 ^ 0x1).b;w20_42 = [x0 + 0x14]:1;[x0 + 0x14]:1 = (w20_42 ^ 0x1).b;w20_44 = [x0 + 0x15]:1;[x0 + 0x15]:1 = (w20_44 ^ 0x1).b;w20_46 = [x0 + 0x16]:1;[x0 + 0x16]:1 = (w20_46 ^ 0x1).b;w20_48 = [x0 + 0x17]:1;[x0 + 0x17]:1 = (w20_48 ^ 0x1).b;[x0 + 0x18]:1 = 0x2d;w20_52 = [x0 + 0x19]:1;[x0 + 0x19]:1 = (w20_52 ^ 0x1).b;w20_54 = [x0 + 0x1a]:1;[x0 + 0x1a]:1 = (w20_54 ^ 0x1).b;w20_56 = [x0 + 0x1b]:1;[x0 + 0x1b]:1 = (w20_56 ^ 0x1).b;w20_58 = [x0 + 0x1c]:1;[x0 + 0x1c]:1 = (w20_58 ^ 0x1).b;w20_60 = [x0 + 0x1d]:1;[x0 + 0x1d]:1 = (w20_60 ^ 0x1).b;w20_62 = [x0 + 0x1e]:1;[x0 + 0x1e]:1 = (w20_63 ^ 0x1).b;w20_64 = [x0 + 0x1f]:1;[x0 + 0x1f]:1 = (w20_64 ^ 0x1).b;w20_66 = [x0 + 0x20]:1;[x0 + 0x20]:1 = (w20_66 ^ 0x1).b;w20_68 = [x0 + 0x21]:1;[x0 + 0x21]:1 = (w20_68 ^ 0x1).b;[x0 + 0x23]:1 = ([0x7e68e7b060 + (zx.64(x1_35.b) & 0xf)]:1).b;[x0 + 0x22]:1 = ([0x7e68e7b060 + t37_1:8]:1).b;

最后再附上我使用完整CFG反编译doCommandNative和ollvm9 sub_fcb4的伪码。

doCommandNative:
//----- (00013124) ----------------------------------------------------var_60:8 = x28;var_58:8 = x27;var_50:8 = x26;var_48:8 = x25;var_40:8 = x24;var_38:8 = x23;var_30:8 = x22;var_28:8 = x21;var_20:8 = x20;var_18:8 = x19;var_10:8 = x29;var_8:8 = x30;x27_1 = tpidr_el0;var_68:8 = [x27_1 + 0x28]:8;w20_1 = w2;t10_1:4 = [0x13194]:4;x25_1 = sx.64([0x141450]:1 + 0xb6);var_78_1:8 = x0;var_6c:4 = 0x0;x22 = malloc_0 ( 0x20 );w13 = x22 == 0x0;w23_1 = (sx.64(w20_1) * 0x68db8bad >>s 0x2c).w + (sx.64(w20_1) * 0x68db8bad >> 0x3f).w;x9_9 = sx.64(w20_1 - w23_1 * 0x2710) * 0x51eb851f;w15_2 = 0xea;w8_6 = 0xb5;w19_1 = 0x72;x14_2 = sx.64(w20_1) * 0x51eb851f;w24_1 = (x9_9 >>s 0x25).w + (x9_9 >> 0x3f).w;LOOP_13238:0:if (w8_6 != 0xda) { x26_1 = x25_1; while ( w19_1 != 0x92 ) { while ( 0x1 ) { if (w15_2 == 0x0) { free_0 ( x22 ); x14_2 = x14_2; w24_1 = w24_1; x27_1 = x27_1; w19_1 = w19_1; w23_1 = w23_1; w15_2 = w15_2; w20_1 = w20_1; x22 = x22; w13 = w13; if (x26_1 != 0x0 || (w8_6 = 0xda, x25_1 = x26_1, var_6c:4 == 0x0)) { w8_6 = 0xb5; w19_1 = 0x92; x25_1 = x26_1; } goto @LOOP_13238:0; } if ((w13 & 0x1) == 0x0) { x2_2 = zx.64(w20_1 - ((x14_2 >>s 0x25).w + (x14_2 >> 0x3f).w) * 0x64); t29_1:8 = x22; [t29_1:8]:8 = 0x0; [t29_1:8 + 0x8]:8 = 0x0; t29_2:8 = x22; [t29_2:8]:4 = w23_1; [t29_2:8 + 0x4]:4 = w24_1; [x22 + 0x8]:4 = x2_2.w2; [x22 + 0x10]:8 = var_78_1:8; [x22 + 0x18]:8 = x3; x14_2 = x14_2; w13 = w13; x26_1 = sub_fc18 ( zx.64(w23_1), zx.64(w24_1), x2_2, 0x1, x22, &var_6c:4 ); w15_2 = 0x0; continue; } x26_1 = 0x0; w15_2 = 0x0; w19_1 = 0x72; break; } }} else { sub_d0dcc ( var_78_1:8 );}return x25_1;

sub_fcb4:
//----- (0000fcb4) ----------------------------------------------------w12_1 = 0x0;w11_1 = 0x0;x1_1 = zx.64(0xff);w1_1 = 0xff;[x0 + 0x17]:1 = ([x0 + 0x18]:1).b;while ( 0x1 ) { w22_3 = 0x6cdff6c3; while ( 0x1 ) { if (w22_3 s<= 0xa0504941) { if (w22_3 s<= 0x8ed87f29) { if (w22_3 != 0x800157d9) { if (w22_3 == 0x8e17425b) { w22_11 = var_48:1 | var_46:1; t25_4:4 = 0xaa4530e6; if (((w22_11 | var_45:1) & 0x1) == 0x0) { t25_4:4 = 0x800157d9; } w22_3 = t25_4:4; } } else { w20 = var_34:4; w21 = var_44:1; t25_7:4 = 0xa147cdcc; if (var_47:1 == 0x0) { t25_7:4 = 0x6594a29a; } w22_3 = t25_7:4; } } else { if (w22_3 != 0x8ed87f2a) { if (w22_3 == 0x8f61219b) { x22_15 = sx.64(var_38:4); w20_7 = [x0 + x22_15]:1; [x0 + x22_15]:1 = (w20_7 ^ 0x1).b; w21 = var_44:1 ^ w20_7; w20 = var_34:4 + w20_7; w22_3 = 0x6594a29a; } } else { w23_5 = var_38:4 == 0xd; var_48:1 = (var_38:4 == 0x8 | w23_5).b; var_47:1 = (var_38:4 == 0xe).b; var_46:1 = (var_38:4 == 0x12).b; w24_6 = var_46:1; t22_5:4 = var_48:1 | var_47:1 | w24_6; var_45:1 = (var_38:4 == 0x18).b; t25_10:4 = 0x8e17425b; if (((t22_5:4 | var_45:1) & 0x1) == 0x0) { t25_10:4 = 0x8f61219b; } w22_3 = t25_10:4; } } continue; } if (w22_3 s> 0x6594a299) { if (w22_3 == 0x6594a29a) { break; } if (w22_3 == 0x6cdff6c3) { var_44:1 = w1_1.b; var_38:4 = w12_1; var_34:4 = w11_1; t25_1:4 = 0x8ed87f2a; if (var_38:4 s>= w1 - 0x2) { t25_1:4 = 0xa0504942; } w22_3 = t25_1:4; } continue; } if (w22_3 != 0xa147cdcc) { if (w22_3 != 0xaa4530e6) { if (w22_3 != 0xa0504942) { continue; } [x0 + 0x23]:1 = ([0x37060 + (zx.64(var_44:1) & 0xf)]:1).b; t45_1:8 = zx.64(var_34:4 - (var_34:4 & 0xfffffff0)); [x0 + 0x22]:1 = ([0x37060 + t45_1:8]:1).b; return x0; } [x0 + sx.64(var_38:4)]:1 = 0x2d; } else { [x0 + sx.64(var_38:4)]:1 = 0x34; } w20 = var_34:4; w21 = var_44:1; w22_3 = 0x6594a29a; } x1_1 = zx.64(w21); w1_1 = x1_1.w1; w11_1 = w20; w12_1 = var_38:4 + 0x1;}

- End -




看雪ID:krash

https://bbs.pediy.com/user-home-240967.htm

  *本文由看雪论坛 krash 原创,转载请注明来自看雪社区。



《安卓高级研修班》2021年6月班火热招生中!


# 往期推荐





公众号ID:ikanxue
官方微博:看雪安全
商务合作:wsc@kanxue.com



球分享

球点赞

球在看



点击“阅读原文”,了解更多!

您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存